Description | Symbol | Value | Unit |
|---|---|---|---|
RNA volume used in cDNA synthesis | VOL_RNA_IN_CDNA | 15 | µL |
Total volume of cDNA synthesis reaction | TOTAL_VOL_CDNA | 20 | µL |
cDNA volume used in ddPCR reaction | VOL_CDNA_IN_PCR | 8 | µL |
Total volume of ddPCR reaction | VOL_PCR_RXN | 20 | µL |
ddPCR HIV RNA Processing
Equations
\[ \text{1. cDNA Synthesis Concentration: } C_{cDNA} = \frac{C_{RNA} \times V_{RNA\_in\_cDNA}}{V_{Total\_cDNA\_rxn}} \]
\[ \text{2. cDNA Mass per Well: } Mass_{well} = \left( C_{cDNA} \times \frac{V_{cDNA\_in\_PCR}}{V_{PCR\_rxn}} \right) \times V_{PCR\_rxn} \]
\[ \text{3. Final Normalization: } \frac{Copies}{1000ng} = \frac{Copies_{well}}{Mass_{well}} \times 1000 \]
Assay Parameters (to adjust)
Load Map Files
Show the R code
# --- Define Paths ---
map_dir <- "/Users/antoinechaillon/Dropbox/WITHINHOST/_codes/ddPCR/data/2026-02-25/"
map_files <- list.files(map_dir, pattern = "*.xlsx", full.names = TRUE)
# map_files
#
map_df1 <- map_files %>%
# 0. Name the vector so map_df can create an ID column from the file paths
set_names() %>%
map_df(~read_xlsx(.x, sheet = " cDNA synthesis calculation", skip = 1), .id = "source_file") %>%
# 1. Handle filters
filter(!is.na(`Patient ID`)) %>%
# 2. Extract Metadata from filename & Recode
mutate(
# Extracts the first part (e.g., BR-2967)
run_id = str_split(basename(source_file), " ", simplify = TRUE)[,1],
# Extracts the date at the end (e.g., 12-16-2024) and converts to Date object
# This regex looks for MM-DD-YYYY before the .xlsx extension
run_date = str_extract(basename(source_file), "\\d{1,2}-\\d{1,2}-\\d{4}") %>%
mdy(),
`ddPCR ID#` = as.character(`ddPCR #`),
RNA_vol_ul = `Sample Volume for RNA synthesis (ul)`,
RNA_conc_ng_ul = `RNA conc (ng/ul) Qubit`,
`DNA template used` = (1000 / 7.1) * RNA_vol_ul / RNA_conc_ng_ul
) %>%
# 3. Select and Standardize
select(
run_id,
run_date,
`ddPCR ID#`,
GlobalSpecimenID = `Patient ID`,
RNA_vol_ul,
RNA_conc_ng_ul,
run = `ddPCR #`
)
# dput(unique(sort(map_df1$GlobalSpecimenID)))
rna_raw <- readRDS(paste0("/Users/antoinechaillon/Dropbox/WITHINHOST/_codes/ddPCR/outputs/rna_raw_unlabeled.RDS"))
# glimpse(rna_raw)
map_df2 <- rna_raw|>dplyr::select(rna_ddpcr_runid,rna_ddpcr_id,pid,rna_spec_abbr,collect_date)|>
dplyr::rename(run=rna_ddpcr_runid,
`GlobalSpecimenID`=rna_ddpcr_id,
acronyms=rna_spec_abbr,
sampling_date=collect_date)
map_df <- map_df2|>dplyr::left_join(map_df1|>dplyr::select(-run),by=c("GlobalSpecimenID" ="GlobalSpecimenID","run"="run_id"))|>
dplyr::filter(!is.na(RNA_vol_ul))
num_cols <- names(map_df)[sapply(map_df, is.numeric)]
datatable(
map_df,
caption = 'Mapping Samples',
filter = 'top',
options = list(pageLength = 10, autoWidth = TRUE, scrollX = TRUE),
rownames = FALSE
) %>%
formatRound(columns = num_cols, digits = 2)Process ddPCR Raw Data
Show the R code
ddpcr_path<- "/Users/antoinechaillon/Dropbox/WITHINHOST/_codes/ddPCR/data/2026-02-25/"
ddpcr_files <- list.files(ddpcr_path, pattern = ".xlsx", full.names = TRUE)
# --- Processing Function ---
process_ddpcr <- function(file_path) {
# Extract Run ID from filename
run_id <- str_split( basename(file_path)," ",simplify = T)[,1]
read_xlsx(file_path,sheet = "raw data") %>%
dplyr::rename(Concentration=`Concentration (copies/ul)`)|>
# Use the column names confirmed by your dput
select(Sample, Target, `Concentration`, Status) %>%
mutate(
# FIX: Force Concentration to numeric to prevent the 'bind_rows' error
Concentration = as.numeric(Concentration),
Sample = as.character(Sample)
) %>%
# Filter out non-numeric samples and keep only valid statuses
# filter(grepl("SW",Sample), is.na(Status)) %>%
filter(grepl(paste(map_df$GlobalSpecimenID,collapse = "|"),Sample), is.na(Status)) %>%
# Pivot so targets become columns
pivot_wider(id_cols = Sample, names_from = Target, values_from = Concentration) %>%
mutate(RunID = run_id)|>
dplyr::rename(`ms tat Rev FAM`=`ms tat REV_FAM`,
`Gag HEX`=GAG_HEX)
}
# msTatRev_well = `ms tat Rev FAM` * VOL_PCR_RXN,
# skGag_well = `Gag HEX` * VOL_PCR_RXN,
ddpcr_data <- ddpcr_files %>%
map_df(~process_ddpcr(.x), .id = "runnum") %>%
mutate(`ddPCR ID#` = paste0(runnum, "-", Sample))
datatable(
ddpcr_data|>dplyr::select(RunID,runnum,Sample, `ddPCR ID#`,
`ms tat Rev FAM`, `Gag HEX`),
caption = 'Preprocessing Results',
filter = 'top',
options = list(pageLength = 10, autoWidth = TRUE, scrollX = TRUE),
rownames = FALSE
) %>%
formatRound(columns = 4:5, digits = 2) %>%
formatStyle(
c('ms tat Rev FAM', 'Gag HEX'),
backgroundColor = '#f0f8ff',
fontWeight = 'bold'
)Join and Calculate
Show the R code
# Using the ddpcr_data tibble you just generated
final_results <- map_df %>%
inner_join(ddpcr_data, by = c("GlobalSpecimenID"="Sample","run"="RunID")) %>%
dplyr::left_join(mymap_df|>dplyr::select(acronyms,location),by="acronyms")|>
dplyr::relocate(location,.after = acronyms)|>
mutate(
# cDNA Synthesis Calculation
# cDNA concentration = (RNA concentration * RNA volume) / total synthesis volume
cDNA_conc_ul = (RNA_conc_ng_ul * VOL_RNA_IN_CDNA) / TOTAL_VOL_CDNA,
# Concentration in the ddPCR reaction
cDNA_conc_ddpcr = (cDNA_conc_ul * VOL_CDNA_IN_PCR) / VOL_PCR_RXN,
# Mass of cDNA per well
ng_well_cDNA = cDNA_conc_ddpcr * VOL_PCR_RXN,
# Absolute copies per well
msTatRev_well = `ms tat Rev FAM` * VOL_PCR_RXN,
skGag_well = `Gag HEX` * VOL_PCR_RXN,
# Final Normalization to 1000ng
msTatRev_1000ng = (msTatRev_well / ng_well_cDNA) * 1000,
skGag_1000ng = (skGag_well / ng_well_cDNA) * 1000
)Final Results
Show the R code
dt_display <- final_results %>%
select(
run,GlobalSpecimenID, pid, location,
`RNA Input (ng/uL)` = RNA_conc_ng_ul,
`cDNA Stock (ng/uL)` = cDNA_conc_ul,
`cDNA in Well (ng)` = ng_well_cDNA,
`msTatRev (cp/uL)` = `ms tat Rev FAM`,
`skGag (cp/uL)` = `Gag HEX`,
`msTatRev / 1000ng` = msTatRev_1000ng,
`skGag / 1000ng` = skGag_1000ng
)
datatable(
dt_display,
caption = 'Final Processed Results (Use buttons below to download)',
filter = 'top',
extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = list(
list(extend = 'copy', title = NULL),
list(extend = 'csv', title = paste0("ddPCR_Results_", Sys.Date())),
list(extend = 'excel', title = paste0("ddPCR_Results_", Sys.Date())),
list(extend = 'pdf', title = paste0("ddPCR_Results_", Sys.Date()))
),
pageLength = 10,
autoWidth = TRUE,
scrollX = TRUE
),
rownames = FALSE
) %>%
formatRound(columns = 5:11, digits = 2) %>%
formatStyle(
c('msTatRev / 1000ng', 'skGag / 1000ng'),
backgroundColor = '#f0f8ff',
fontWeight = 'bold'
)Analyses of Replicates
Data processing
Show the R code
process_ddpcr_diagnostics <- function(file_path) {
run_id <- str_split(basename(file_path), " ", simplify = TRUE)[, 1]
read_xlsx(file_path, sheet = "raw data") %>%
dplyr::rename(Concentration = `Concentration (copies/ul)`) %>%
select(Sample, Target, Concentration, Status) %>%
mutate(
Concentration = as.numeric(Concentration),
Sample = as.character(Sample)
) %>%
# Filter for individual manual replicates
filter(
grepl(paste(map_df$GlobalSpecimenID, collapse = "|"), Sample),
Status == "Manual"
) %>%
group_by(Sample, Target) %>%
summarise(
n_pos_reps = sum(Concentration > 0, na.rm = TRUE),
mean_conc = mean(Concentration, na.rm = TRUE),
sd_conc = sd(Concentration, na.rm = TRUE),
# CV% = (SD / Mean) * 100
cv_percent = (sd_conc / mean_conc) * 100,
.groups = "drop"
) %>%
mutate(
RunID = run_id,
# Flag suspicious results: only 1/3 positive or high CV%
reliability = case_when(
n_pos_reps == 3 ~ "High",
n_pos_reps == 2 ~ "Moderate",
n_pos_reps == 1 ~ "Low (Sporadic)",
TRUE ~ "Negative"
)
)
}
# Apply to all files
diagnostic_data <- ddpcr_files %>%
map_df(~process_ddpcr_diagnostics(.x))
final_diagnosis_pos <- map_df %>%
dplyr::select(run,GlobalSpecimenID,pid,acronyms)|>
inner_join(diagnostic_data, by = c("GlobalSpecimenID"="Sample","run"="RunID")) %>%
dplyr::left_join(mymap_df|>dplyr::select(acronyms,location),by="acronyms")|>
dplyr::relocate(location,.after = acronyms)|>
dplyr::mutate(n_target_pos=sum(n_pos_reps>0),.by = c(GlobalSpecimenID))|>
dplyr::mutate(group=case_when(n_target_pos==0~"Double Negative",
n_target_pos==1~"Single Positive",
n_target_pos==2~"Double Positive",
.default = NA_character_))|>
dplyr::filter(!group=="Double Negative")|>
dplyr::filter(n_pos_reps>0)|>
dplyr::mutate(status=case_when(pid%in%c("LG300","LG301","LG302","LG303")~"PWoH",
.default = "PWH"))
num_cols <- names(final_diagnosis_pos)[sapply(final_diagnosis_pos, is.numeric)]
datatable(
final_diagnosis_pos,
caption = 'Data with replicates',
filter = 'top',
# extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = list(
list(extend = 'copy', title = NULL),
list(extend = 'csv', title = paste0("ddPCR_Results_", Sys.Date())),
list(extend = 'excel', title = paste0("ddPCR_Results_", Sys.Date())),
list(extend = 'pdf', title = paste0("ddPCR_Results_", Sys.Date()))
),
pageLength = 10,
autoWidth = TRUE,
scrollX = TRUE
),
rownames = FALSE
) %>%
formatRound(columns = num_cols, digits = 2) Show the R code
# formatStyle(
# c('Mean % CV across replicates', 'Perc. with 1/3 positive only (sporadic)'),
# backgroundColor = '#f0f8ff',
# fontWeight = 'bold'
# )Diagnosis Summary
Show the R code
diag_display <- final_diagnosis_pos %>%
group_by(status,group,Target) %>%
summarise(
Total_Pos_Samples = n(),
`Median number of positive replicate` = median(n_pos_reps),
`Mean % CV across replicates` = mean(cv_percent, na.rm = TRUE),
`Perc. with 1/3 positive only (sporadic)` = mean(n_pos_reps == 1) * 100 # % of samples with only 1/3 pos
)
datatable(
diag_display,
caption = 'Diagnosis Summary',
filter = 'top',
# extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
# buttons = list(
# list(extend = 'copy', title = NULL),
# list(extend = 'csv', title = paste0("ddPCR_Results_", Sys.Date())),
# list(extend = 'excel', title = paste0("ddPCR_Results_", Sys.Date())),
# list(extend = 'pdf', title = paste0("ddPCR_Results_", Sys.Date()))
# ),
pageLength = 10,
autoWidth = TRUE,
scrollX = TRUE
),
rownames = FALSE
) %>%
formatRound(columns = 6:7, digits = 2) %>%
formatStyle(
c('Mean % CV across replicates', 'Perc. with 1/3 positive only (sporadic)'),
backgroundColor = '#f0f8ff',
fontWeight = 'bold'
)Visualizations
Coefficients of Variation across Replicates
Show the R code
ggplot(final_diagnosis_pos, aes(x = as.factor(n_pos_reps), y = cv_percent, color = status)) +
geom_boxplot(alpha = 0.3) +
geom_jitter(position = position_jitterdodge(), alpha = 0.6) +
geom_hline(yintercept = 173.2, linetype = "dashed", color = "red") +
annotate("text", x = 1.5, y = 180, label = "Theoretical Limit (1/3 pos)", color = "red") +
labs(
title = "Variance Analysis: PWH vs PWoH",
x = "Number of Positive Replicates (out of 3)",
y = "CV (%)",
subtitle = "CV% near 173% indicates stochastic/single-well detection"
) +
theme_minimal()Show the R code
# Create a contingency table of robustness
# table_robust <- table(final_diagnosis_pos$status, final_diagnosis_pos$n_pos_reps == 3)
#
# # Fisher's Exact Test: Is PWH more likely to have 3/3 replicates than PWoH?
# fisher.test(table_robust)Detection Threshold?
I am calculating the 95th percentile of the concentration found in the PWoH group. Any PWH sample with a concentration above this value can be considered a “True Positive” with 95% confidence.
Show the R code
# --- Calculate the Noise Threshold per Target ---
thresholds <- final_diagnosis_pos %>%
filter(status == "PWoH") %>%
group_by(Target) %>%
summarise(
# Use 95th percentile of the mean concentration in negatives
limit_of_noise = quantile(mean_conc, 0.95, na.rm = TRUE),
max_noise = max(mean_conc, na.rm = TRUE),
n_controls = n()
)
# print(thresholds)
ggplot(final_diagnosis_pos, aes(x = status, y = mean_conc, color = status)) +
geom_boxplot(outlier.shape = NA, alpha = 0.2) +
geom_jitter(width = 0.2, alpha = 0.6) +
# Add the threshold line (assuming Gag_HEX for this example)
geom_hline(data = thresholds, aes(yintercept = limit_of_noise),
linetype = "dashed", color = "red") +
facet_wrap(~Target, scales = "free_y") +
scale_y_log10() + # Use log scale because ddPCR data spans orders of magnitude
labs(
title = "Defining the Assay Detection Threshold",
subtitle = "Red line = 95th percentile of PWoH (Noise Floor)",
y = "Concentration (copies/µL)",
x = "Group"
) +
theme_minimal()